import jieba
import wordcloud
import imageio
import sys
name = ''
def readtxt(filename):
    try:
        with open(filename, 'r', encoding = 'utf-8') as f:
            print('utf-8')
            return f.read()
    except:
        try:
            with open(filename, 'r', encoding = 'gbk') as f:
                print('gbk')
                return f.read()
        except:
            print('打开文件失败！')
            exit()
    

def cutandexp(s):
    exp = [
        '我们','他们','这样','看到','没有','这个','那个','什么','一个','还是','就是','以后','突然','很快','起来','只有',
        '这时','现在','由于','然后','所有','那些','这是','你们','一样','知道','因为','那里','怎么','自己','已经','这种','可能',
        '不是','这些','只是','可以','这里','它们','还有',
    ]
    addwords = ['孙行者','沙和尚','唐三藏']
    for adword in addwords:
        jieba.add_word(adword)

    ls = jieba.lcut(s)
    words = []
    for l in ls:
        if len(l) > 1 and not l in exp:
            words.append(l)

    return ' '.join(words),words

def worcloutopng(words,size=(600,400)):
    wc = wordcloud.WordCloud(width = size[0], height = size[1],\
                            font_path = 'PingFang-W6.ttc',\
                            collocations = False,\
                            max_words = 200)
    wc.generate(words)
    wc.to_file('{}词云.png'.format(name[:-4]))

def sumwords(words):
    wordsum = {}
    for word in words:
        wordsum[word] = wordsum.get(word,0) + 1
    ls = list(wordsum.items())
    ls = sorted(ls,key = lambda x:x[1],reverse = True)
    with open('{}分词.csv'.format(name[:-4]), mode = 'w') as f:
        for l in ls:
            f.write('{},{}\n'.format(l[0],l[1]))
    # with open('{}分词.csv'.format(name[:-4]), mode = 'w') as f:
    #     for k,v in wordsum.items():
    #         f.write('{},{}\n'.format(k,v))

def main():
    # filename = '材料工程基础复习提纲.txt'
    if len(sys.argv) > 1:
        filename = sys.argv[1]
    else:
        filename = input('please input filename:')
    global name
    name = filename
    txt = readtxt(filename)
    s,words = cutandexp(txt)
    print("cut finish!")
    sumwords(words)
    print("sum finish!")
    worcloutopng(words = s, size = (1920,1080))
    print('OK!')

if __name__ == "__main__":
    main()